import os
import time

import pandas as pd
import openai

# Experiment #1
# zero shot prompting

openai.organization = "####"
openai.api_key = "####"

temperature = 1.0 # choose between 1.0 and 0.2 for replication

def isSameEntity(entity_a, entity_b):
	response = openai.ChatCompletion.create(
  		model="gpt-4",
  		messages=[
        		{"role": "user", "content": f"How confident are you that the following entities, {entity_a} and {entity_b}, refer to the same entity, allowing for the possibility of minor typos?\nPlease return your confidence in the range of 0 and 1 only and no other words."}
    		]
	)
	return float(response["choices"][0]["message"]["content"])


cache = {}

max_number_tries = 10
def process(file_name):
	df = pd.read_csv(file_name)
	processed_file_name = f"experiment_1_temp{temperature}"
	df["chatGPT_score"] = None
	for i in range(df.shape[0]):
		a, b, label = df.iloc[i]["amicus"], df.iloc[i]["bonica"], df.iloc[i]["label"]
		key = a + '---' + b	
		if key in cache:
			response = cache[key]
			print("cached")
		else:
			success = False
			tries = 0
			while not success:
				try:
					tries += 1
					response = isSameEntity(a, b)
					success = True
				except:
					if tries >= max_number_tries:
						print("Exceeded "+ str(max_number_tries) + " tries ... ...")
						exit()
					print("index "+ str(i) +" retrying ...", tries)
					time.sleep(0.1 * tries)
		df.loc[i, "chatGPT_score"] = response
		print(i)
	df.to_csv(processed_file_name, index=False)

start = time.time()
print("start", start)
process("evaluation_set.csv") #
end = time.time()
print("end", end)
print("total", end - start)
